{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7fe6cd6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import matplotlib.pylab as plt\n",
    "import numpy as np\n",
    "\n",
    "df = pd.read_csv(\"data/ssd_failure_tag.csv\")\n",
    "df_topo = pd.read_csv(\"data/location_info_of_ssd.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "349439f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = df.groupby('machine_room_id')\n",
    "res_df = pd.DataFrame(columns=['machine_room_id','failure','all'])\n",
    "row = 0\n",
    "for col, group in grouped:\n",
    "    res_df.loc[row] = [col,group.shape[0], '']\n",
    "    \n",
    "    row = row + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3254a59e",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_top = pd.merge(df_topo, df, left_on='disk_id',right_on=\"disk_id\",how=\"inner\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "5103edb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped2 = df_top.groupby('machine_room_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "36f39602",
   "metadata": {},
   "outputs": [],
   "source": [
    "row = 0\n",
    "for col, group in grouped2:\n",
    "    res_df.iloc[row, 2] = group.shape[0]\n",
    "    \n",
    "    row = row + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "684c61b1",
   "metadata": {},
   "outputs": [],
   "source": [
    "res_df['RFR'] = res_df['failure'] * 100 / res_df['all']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "408d3b99",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    machine_room_id failure  all        RFR\n",
      "0                 0       1    7  14.285714\n",
      "1                 1       1   11   9.090909\n",
      "2                 2       5   46  10.869565\n",
      "3                 3       1    2       50.0\n",
      "4                 4      10   70  14.285714\n",
      "..              ...     ...  ...        ...\n",
      "670             670      44  305   14.42623\n",
      "671             671       1   10       10.0\n",
      "672             672       7   16      43.75\n",
      "673             673       1    1      100.0\n",
      "674             674       1   11   9.090909\n",
      "\n",
      "[675 rows x 4 columns]\n"
     ]
    }
   ],
   "source": [
    "print(res_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c77449ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:title={'center':'Box Chart'}>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQq0lEQVR4nO3de4ycV33G8e+TjUMAm1yIFUIuGAoCI5dLYgEtLmBScYdEKgLSFgK4TaVCoIBUXNw2Qq2loFIoXbVILkYkpTWXgJrQQAVKTZClgroOKQ0siBAwJM1lgRBsiKlj//rHvGs2213bO7MXz9nvR1rNvOe9/WzZz5w9M3NOqgpJUltOWOoCJEnzz3CXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S71Icnrk+xa6jqk2RjuGipJvpfk/iT7ktyb5Pok5y7QvV6Y5EtJ9iaZSHJjklcs0L2el+T2hbi2lifDXcPo5VW1EjgLuBsYne8bJHkl8EngauAc4Ezgz4GXL8C9Tpzva0qGu4ZWVe0HrgGePNmW5JQkV3c97T1J/jTJCUlOT3J7kpd3x61McmuS102/bpIA7wP+oqo+VFX3VdWhqrqxqn5/2rHv7X6D+G6SF09pf0OS8a7Xf1uSP5iy73ldLe9MchewA/gc8OjuN5J9SR49z39dWmbsMWhoJXkY8Grgy1OaR4FTgMcBjwQ+D9xZVduTvBG4OslTgK3AzVV19QyXfiJwLr0XjiN5JnAVcAZwGbA9ydnVm9PjHuBlwG3Ac4DPJfnPqrqpO/dRwOnAY+h1sp4JfLSqzpnL34E0G8Ndw+hfkjwAPByYAF4IkGQEeA3wtKraC+xN8tfAa4HtVfX5JJ8EbqAXrE+Z5fqP7B7vPEode6rqH7p7XwX8Pb3hm7uq6vopx92Y5PPAbwCT4X4IuKKqftGdf2x/cukYOSyjYXRxVZ0KnAy8mV54PopeD3oFsGfKsXuAs6dsbwPWAR+pqh/Ncv3J9rOOUsddk0+q6ufd05UASV6c5MtJfpzkJ8BLuvomTXTDStKCMNw1tKrqYFV9GjgIbAB+CBygN9Qx6TzgDjjcs99G703SP0zy+Fku/S3gB8Bv9VNXkocAnwLeC5zZvRB9FpjaPZ8+HavTs2peGe4aWum5CDgNGK+qg8AngK1JViV5DPB24KPdKe+iF6JvBP6K3vj7yPTrdmPmbwf+rHtj9BHdm7Ibkmw7htJOAh5Cb8joge6N1hcc5Zy7gUcmOeUYri8dleGuYfSZJPuAn9J7Y/TSqvp6t+9y4Gf03sjcBfwz8OEkF9AL7Nd1LwLvoRf0m2e6QVVdQ+/N2jcC/0MvfP8SuPZoxXXj/W+h90JzL/DbwHVHOeeb9D41c1uSn/hpGQ0qLtYhSe2x5y5JDTLcJalBhrskNchwl6QGHRffUD3jjDNqzZo1S12GJA2V3bt3/7CqVs+077gI9zVr1jA2NrbUZUjSUEmyZ7Z9DstIUoMMd0lqkOEuSQ0y3CWpQYa7JDXoqOGe5MNJ7klyy5S205N8Icm3u8fTuvYk+dtu+bKvJTl/IYuXFsrIyAhJDv+MjPy/ySOl49qx9Nw/ArxoWttm4IaqegK9VW0mZ9Z7MfCE7ucy4IPzU6a0eEZGRjh06BArV65k9+7drFy5kkOHDhnwGipHDfeq+hLw42nNF9FbO5Lu8eIp7VdXz5eBU5McbTUb6bgyGex79+7l/PPPZ+/evYcDXhoW/Y65n1lVk+tL3kVv3UjoLWf2gynH3c6Dlzg7LMllScaSjE1MTPRZhrQwbrzxxiNuS8e7gd9Q7VatmfOk8FW1rarWV9X61atn/PastGSe+9znHnFbOt71G+53Tw63dI/3dO13AOdOOe6crk0aGieccAL79u1j1apV3HTTTaxatYp9+/Zxwgl+uEzDo99/rdcBl3bPL+WXS49dB7yu+9TMs4D7pgzfSEPh4MGDhwP+ggsuOBzsBw8eXOrSpGN21InDkuwAngeckeR24ArgSuATSTYBe4BXdYd/FngJcCvwc+ANC1CztOAMcg27o4Z7VV0yy64LZzi2gDcNWpQkaTAOIkpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0aKNyTvC3J15PckmRHkpOTPDbJV5LcmuTjSU6ar2KlxbJjxw7WrVvHyMgI69atY8eOHUtdkjQnfYd7krOBtwDrq2odMAK8BngP8P6qejxwL7BpPgqVFsuOHTvYsmULo6Oj7N+/n9HRUbZs2WLAa6gMOixzIvDQJCcCDwPuBJ4PXNPtvwq4eMB7SItq69atbN++nY0bN7JixQo2btzI9u3b2bp161KXJh2zvsO9qu4A3gt8n16o3wfsBn5SVQ90h90OnD3T+UkuSzKWZGxiYqLfMqR5Nz4+zoYNGx7UtmHDBsbHx5eoImnuBhmWOQ24CHgs8Gjg4cCLjvX8qtpWVeurav3q1av7LUOad2vXrmXXrl0Patu1axdr165dooqkuRtkWOY3ge9W1URVHQA+DTwbOLUbpgE4B7hjwBqlRbVlyxY2bdrEzp07OXDgADt37mTTpk1s2bJlqUuTjtmJRz9kVt8HnpXkYcD9wIXAGLATeCXwMeBS4NpBi5QW0yWXXALA5Zdfzvj4OGvXrmXr1q2H26VhkKrq/+Tk3cCrgQeArwK/R2+M/WPA6V3b71bVL450nfXr19fY2FjfdUjScpRkd1Wtn2nfID13quoK4IppzbcBzxjkupKkwfgNVUlqkOEuSQ0y3KUZOP2Aht1AY+5SiyanH9i+fTsbNmxg165dbNrUm0XDT8xoWAz0aZn54qdldDxZt24do6OjbNy48XDbzp07ufzyy7nllluWsDLpwY70aRnDXZpmZGSE/fv3s2LFisNtBw4c4OSTT+bgwYNLWJn0YEcKd8fcpWmcfkAtMNylaZx+QC3wDVVpGqcfUAscc5ekIeWYuyQtM4a7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXZuBKTBp2ThwmTeNKTGqBE4dJ07gSk4aFKzFJc+BKTBoWzgopzYErMakFhrs0jSsxqQW+oSpN40pMaoFj7pI0pBxzl6RlxnCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQQOGe5NQk1yT5ZpLxJL+W5PQkX0jy7e7xtPkqVloszgqpYTdoz/0DwL9V1ZOApwLjwGbghqp6AnBDty0NjclZIUdHR9m/fz+jo6Ns2bLFgNdQ6ftLTElOAW4GHldTLpLkW8DzqurOJGcBX6yqJx7pWn6JSccTZ4XUsFiQWSGTPA3YBnyDXq99N/BW4I6qOrU7JsC9k9vTzr8MuAzgvPPOu2DPnj191SHNN2eF1LBYqG+ongicD3ywqp4O/IxpQzBdj37GV4+q2lZV66tq/erVqwcoQ5pfzgqpFgwS7rcDt1fVV7rta+iF/d3dcAzd4z2DlSgtLmeFVAv6nhWyqu5K8oMkT6yqbwEX0hui+QZwKXBl93jtvFQqLRJnhVQLBpoVsht3/xBwEnAb8AZ6vw18AjgP2AO8qqp+fKTr+IaqJM3dkcbcB5rPvapuBma68IWDXFeSNBi/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSggWaFlFrVWyHywQaZHltabPbcpWmmBvtFF100Y7t0vLPnLs1iak/dYNewsecuzWBqj32mbel4N9Aye/PFZfZ0PJnspc/Ucz8e/r9Ik460zJ49d2kWSbj44osdktFQMtylaab2zq+99toZ26XjnW+oSjMwyDXs7LlLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBvlRSC0razZfvyj3+d6VL12U+0izMdy1rMw1dNdsvt6g1lByWEaSGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aOBwTzKS5KtJ/rXbfmySryS5NcnHk5w0eJmSpLmYj577W4HxKdvvAd5fVY8H7gU2zcM9JElzMFC4JzkHeCnwoW47wPOBa7pDrgIuHuQekqS5G7Tn/jfAHwOHuu1HAj+pqge67duBs2c6McllScaSjE1MTAxYhiRpqr7DPcnLgHuqanc/51fVtqpaX1XrV69e3W8ZkqQZDDK3zLOBVyR5CXAy8AjgA8CpSU7seu/nAHcMXqYkaS767rlX1Z9U1TlVtQZ4DfDvVfU7wE7gld1hlwLXznIJSdICWYjPub8TeHuSW+mNwW9fgHtIko5gXqb8raovAl/snt8GPGM+ritJ6o/fUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgE5e6AKlfT33357nv/gMLfp81m69f8Huc8tAV/NcVL1jw+2j5MNw1tO67/wDfu/KlS13GvFiMFxAtLw7LSFKDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAb1He5Jzk2yM8k3knw9yVu79tOTfCHJt7vH0+avXEnSsRik5/4A8I6qejLwLOBNSZ4MbAZuqKonADd025KkRdR3uFfVnVV1U/d8LzAOnA1cBFzVHXYVcPGANUqS5mhextyTrAGeDnwFOLOq7ux23QWcOcs5lyUZSzI2MTExH2VIkjoDh3uSlcCngD+qqp9O3VdVBdRM51XVtqpaX1XrV69ePWgZkqQpBgr3JCvoBfs/VdWnu+a7k5zV7T8LuGewEiVJczXIp2UCbAfGq+p9U3ZdB1zaPb8UuLb/8iRJ/RhkVshnA68F/jvJzV3bu4ArgU8k2QTsAV41UIWSpDnrO9yraheQWXZf2O91JUmD8xuqktQgw12SGuRKTBpaq9Zu5levauML0KvWArSxqpSOD4a7htbe8StdZk+ahcMyktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhrk9AMaaq18bf+Uh65Y6hLUGMNdQ2sx5pVZs/n6Zuav0fLisIwkNchwl6QGOSyjZaWfMfp+znEoR0vNcNeyYuhquXBYRpIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgVNVS10CSCWDPUtchzeAM4IdLXYQ0i8dU1eqZdhwX4S4dr5KMVdX6pa5DmiuHZSSpQYa7JDXIcJeObNtSFyD1wzF3SWqQPXdJapDhLkkNMty1rCU5mOTmJLck+UySU7v2NUnu7/ZN/pyU5PVJJrrtbyZ52xL/EaQZGe5a7u6vqqdV1Trgx8Cbpuz7Trdv8ud/u/aPV9XTgGcDW5Kcu8g1S0dluEu/9B/A2cd6cFX9CLgVOGvBKpL6ZLhLQJIR4ELguinNvzJlSObvZjjnPOBk4GuLVKZ0zFwgW8vdQ5PcTK/HPg58Ycq+73TDL9O9OslzgCcBb66q/QtepTRH9ty13N3fBfhjgPDgMffZfLyqngL8OnBlkkctYH1SXwx3CaiqnwNvAd6R5Jh+o62qMeAfgbcuZG1SPwx3qVNVX6U3fn7JHE57D/CGJKsWpiqpP04/IEkNsucuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KD/g8J3PKJWyirygAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "res_df['RFR'].plot.box(title=\"Box Chart\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "485b9b54",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16.19047619047619"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res_df['RFR'].quantile()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e97cd74d",
   "metadata": {},
   "outputs": [],
   "source": [
    "Q1,Q3 = np.percentile(res_df['RFR'] , [25,75]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "138b30b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10.716177317213585\n"
     ]
    }
   ],
   "source": [
    "print(Q1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "3f8b55bd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25.0\n"
     ]
    }
   ],
   "source": [
    "print(Q3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0894779f",
   "metadata": {},
   "outputs": [],
   "source": [
    "IQR = Q3 - Q1 \n",
    "lower_range = Q1 - (1.5 * IQR) \n",
    "upper_range = Q3 + (1.5 * IQR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "c212b487",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-10.709556706966037\n"
     ]
    }
   ],
   "source": [
    "print(lower_range)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "5eedbd40",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "46.42573402417962\n"
     ]
    }
   ],
   "source": [
    "print(upper_range)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "28a27d27",
   "metadata": {},
   "outputs": [],
   "source": [
    "res_df.drop(res_df.index[(res_df['RFR'] <= upper_range)], inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "883e57d6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    machine_room_id failure all        RFR\n",
      "3                 3       1   2       50.0\n",
      "7                 7       3   6       50.0\n",
      "8                 8       4   8       50.0\n",
      "28               28       1   2       50.0\n",
      "44               44       1   2       50.0\n",
      "..              ...     ...  ..        ...\n",
      "641             641       1   2       50.0\n",
      "651             651       1   2       50.0\n",
      "664             664       1   2       50.0\n",
      "665             665       2   3  66.666667\n",
      "673             673       1   1      100.0\n",
      "\n",
      "[65 rows x 4 columns]\n"
     ]
    }
   ],
   "source": [
    "print(res_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "8471dbc2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9.62962962962963"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "65 / 675 * 100"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
